In this workshop we will show the capability of TCGAbiolinks and Moonlight, to integrate multi -omics data from different consortium and to reproduce the six immune subtypes from TCGA PanCancer and how features (Immune Subtypes, Oncogenic Processes, Driver Genes and Stemness) can be used by the end user to expand their understating of their own un-published data.
The workshop is organized in 4 subsections:
TCGAbiolinksYou can easily query - download - prepare multi -omics data from GDC: . Gene expression . Copy number . Protein expression (RRPA) . Methylation . Clinical data . microRNA
You can easily search TCGA samples, download and prepare a matrix of gene expression.
#DataDir <- "~/Dropbox (Personal)/Umiami/TCGAanalysis/GDCdata/TCGA-BRCA"
# Firstly we install TCGAbiolinks from bioconductor or from github
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("TCGAbiolinks")
require(TCGAbiolinks)
cancerType <- "BRCA"
# Query platform Illumina HiSeq with a list of barcode
query <- GDCquery(project = paste0("TCGA-",cancerType),
data.category = "Gene expression",
data.type = "Gene expression quantification",
experimental.strategy = "RNA-Seq",
platform = "Illumina HiSeq",
file.type = "results",
legacy = TRUE)
# We select 10 tumor and 10 normal samples
Sample_sel <- query$results[[1]]$cases
Sample_sel_TP <- TCGAquery_SampleTypes(barcode = Sample_sel,typesample = "TP")
Sample_sel_NT <- TCGAquery_SampleTypes(barcode = Sample_sel,typesample = "NT")
Sample_sel_short <- c(Sample_sel_TP[1:10],Sample_sel_NT[1:10])
# we need to create a new query with the selected barcodes
query_down <- GDCquery(project = paste0("TCGA-",cancerType),
data.category = "Gene expression",
data.type = "Gene expression quantification",
experimental.strategy = "RNA-Seq",
platform = "Illumina HiSeq",
file.type = "results",
barcode = Sample_sel_short,
legacy = TRUE)
# Download a list of barcodes with platform IlluminaHiSeq_RNASeqV2
GDCdownload(query_down,directory = DataDir)
# Prepare expression matrix with geneID in the rows and samples (barcode) in the columns
# rsem.genes.results as values
BRCARnaseqSE <- GDCprepare(query_down,directory = DataDir)
#
# For gene expression if you need to see a boxplot correlation and AAIC plot to define outliers you can run
dataPrep <- TCGAanalyze_Preprocessing(BRCARnaseqSE)
The result is shown below:
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
## Registered S3 method overwritten by 'R.oo':
## method from
## throw.default R.methodsS3
| TCGA-AQ-A0Y5-01A-11R-A14M-07 | TCGA-A2-A0CV-01A-31R-A115-07 | |
|---|---|---|
| PTCD2|79810 | 874 | 447 |
| FZD8|8325 | 199 | 1505 |
| PRLHR|2834 | 0 | 0 |
| SIGLECP3|284367 | 16 | 38 |
| CKLF|51192 | 642 | 1599 |
| IL20|50604 | 2 | 86 |
| CXorf22|170063 | 11 | 11 |
| LETM2|137994 | 70 | 1055 |
| AADACL2|344752 | 0 | 0 |
| C17orf73|55018 | 7 | 0 |
The result from TCGAanalyze_Preprocessing is shown below:
You can easily search TCGA samples, download and prepare a matrix of protein RPPA expression.
cancerType <- "BRCA"
query.RPPA <- GDCquery(project = paste0("TCGA-",cancerType),
legacy = TRUE,
data.category = "Protein expression",
platform = "MDA_RPPA_Core",
data.type = "Protein expression quantification",
file.type = "expression",
sample.type = c("Primary solid Tumor"))
samples.RPPA <- query.RPPA$results[[1]]$cases
query.RPPA.down <- GDCquery(project = paste0("TCGA-",cancerType),
legacy = TRUE,
data.category = "Protein expression",
data.type = "Protein expression quantification",
platform = "MDA_RPPA_Core",
file.type = "expression",
sample.type = c("Primary solid Tumor"),
barcode = samples.RPPA)
GDCdownload(query.RPPA.down,
directory = PathDir)
data.RPPA <- GDCprepare(query.RPPA.down,
directory = PathDir)
You can easily search TCGA samples, download and prepare a matrix of CN segments.
require(TCGAbiolinks)
cancerType <- "BRCA"
query.cnv <- GDCquery(project = cancerType,
data.category = "Copy number variation",
legacy = TRUE,
data.type = "Copy number segmentation",
platform = "Affymetrix SNP Array 6.0",
file.type = "nocnv_hg19.seg",
sample.type = c("Primary solid Tumor"))
samples.cnv <- query.cnv$results[[1]]$cases[1:20]
query.cnv.down <- GDCquery(project = cancerType,
data.category = "Copy number variation",
legacy = TRUE,
platform = "Affymetrix SNP Array 6.0",
data.type = "Copy number segmentation",
file.type = "nocnv_hg19.seg",
sample.type = c("Primary solid Tumor"),
barcode = samples.cnv)
GDCdownload(query.cnv.down,
directory = PathDir)
data.cnv <- GDCprepare(query.cnv.down,
directory = PathDir)
data.cnv <- as.data.frame(data.cnv)
You can easily search TCGA samples, download and prepare a matrix of DNA methylation probes.
require(TCGAbiolinks)
cancerType <- "BRCA"
query.met <- GDCquery(project = cancerType,
legacy = TRUE,
data.category = "DNA methylation",
platform = "Illumina Human Methylation 450",
sample.type = c("Primary solid Tumor"))
samples.met <- query.met$results[[1]]$cases[1:20]
query.met.down <- GDCquery(project = cancerType,
legacy = TRUE,
data.category = "DNA methylation",
platform = "Illumina Human Methylation 450",
sample.type = c("Primary solid Tumor"),
barcode = samples.met)
GDCdownload(query.met.down,
directory = PathDir)
data.met <- GDCprepare(query.met.down,
directory = PathDir)
You can easily search TCGA samples, download and prepare a matrix of Mutations.
cancerType <- "TCGA-BRCA"
query.mut <- GDCquery(project = cancerType,
data.category = "Simple nucleotide variation",
data.type = "Simple somatic mutation",
access = "open",
sample.type = c("Primary solid Tumor"),
legacy = TRUE)
# Check maf availables
query.mut$results[[1]]$file_name
query.mut <- GDCquery(project = cancerType,
data.category = "Simple nucleotide variation",
data.type = "Simple somatic mutation",
access = "open",
sample.type = c("Primary solid Tumor"),
legacy = TRUE,
file.type = query.mut$results[[1]]$file_name[1])
samples.mut <- query.mut$results[[1]]$cases
query.mut.down <- GDCquery(project = cancerType,
data.category = "Simple nucleotide variation",
data.type = "Simple somatic mutation",
access = "open",
sample.type = c("Primary solid Tumor"),
legacy = TRUE,
barcode = samples.mut,
file.type = query.mut$results[[1]]$file_name[1])
GDCdownload(query.mut.down,
directory = PathDir)
data.mut <- GDCprepare(query.mut.down,
directory = PathDir)
You can easily search TCGA samples, download and prepare a matrix of microRNA Expression
cancerType <- "TCGA-BRCA"
query.miR <- GDCquery(project = cancerType,
data.category = "Gene expression",
data.type = "miRNA gene quantification",
platform = "Illumina HiSeq",
file.type = "hg19.mirbase20.mirna.quantification",
legacy = TRUE)
samples.miR <- query.miR$results[[1]]$cases
query.miR.down <- GDCquery(project = cancerType,
data.category = "Gene expression",
data.type = "miRNA gene quantification",
platform = "Illumina HiSeq",
file.type = "hg19.mirbase20.mirna.quantification",
legacy = TRUE,
barcode = TNBCsamplesmiRlong)
GDCdownload(query.miR.down,
directory = PathDir)
data.miR <- GDCprepare(query.miR.down,
directory = PathDir)
You can easily search TCGA samples, download and prepare a matrix of clinical data
cancerType <- "BRCA"
dataClin <- GDCquery_clinic(project = paste0("TCGA-",cancerType),type = "clinical")
You can easily search GTEx samples, download and prepare a matrix of gene expression.
data_gtex_brain <- TCGAquery_recount2("gtex", tissue = "brain")
You can easily search IHEC Blueprint samples, download and prepare a matrix of gene expression.
require(DeepBlueR)
require(dplyr)
# List all BLUEPRINT samples
blueprint_samples <- deepblue_list_samples(
extra_metadata = list("source" = "BLUEPRINT Epigenome"))
# Extract their ids
blueprint_samples_ids <- deepblue_extract_ids(blueprint_samples)
# Select gene expression data. We assign gene names using Gencode 22
gene_exprs_query <- deepblue_select_expressions(sample_ids = blueprint_samples_ids,
expression_type = "gene",
gene_model = "gencode v22")
gene_exprs_query <- deepblue_select_expressions(sample_ids = blueprint_samples_ids,
expression_type = "gene",
gene_model = "gencode v19")
# We request the data and define the output format
request = deepblue_get_regions(query_id = gene_exprs_query,
"@GENE_ID(gencode v19),FPKM,@BIOSOURCE,@SAMPLE_ID")
# We download the data
gene_regions <- deepblue_download_request_data(request)
# We retain a table mapping sample ids to bisources
sample_names <- dplyr::select(gene_regions, `@BIOSOURCE`, `@SAMPLE_ID`) %>%
dplyr::distinct()
# We filter out duplicated gene entries
genes_one_sample <- dplyr::filter(gene_regions, `@SAMPLE_ID` == "s10678")
duplicated_genes <- genes_one_sample[
which(duplicated(genes_one_sample$`@GENE_ID(gencode v22)`)),
"@GENE_ID(gencode v22)"]
genes_one_sample <- dplyr::filter(gene_regions, `@SAMPLE_ID` == "s10678")
duplicated_genes <- genes_one_sample[
which(duplicated(genes_one_sample$`@GENE_ID(gencode v19)`)),
"@GENE_ID(gencode v19)"]
# We convert the gene expression from a list to a data frame and subsequently...
genes_matrix = dplyr::filter(gene_regions,
!(`@GENE_ID(gencode v22)` %in% duplicated_genes)) %>%
dplyr::select(-`@BIOSOURCE`) %>%
tidyr::spread(key = `@SAMPLE_ID`, value = FPKM)
# ...to a numeric matrix
genes <- genes_matrix[,1]
genes_matrix <- data.matrix(genes_matrix[,-1])
rownames(genes_matrix) <- genes
### OUTPUT
### genes_matrix : The gene expression matrix for all 276 BLUEPRINT samples
### sample_names : A mapping table from sample id to cell type / biosource
save(genes_matrix, file = "IHEC_genes_matrix.Rdata")
save(sample_names, file = "IHEC_Sample_names.rdata")
You can easily search GEO samples, download and prepare a matrix of gene expression.
require(MoonlightR)
library(devtools)
install_github("ibsquare/MoonlightR")
dataNazor <- getDataGEO(GEOobject = "GSE30652",
platform = "GPL6947")
require(SummarizedExperiment)
GSE30652 <- as.data.frame(exprs(dataNazor))
GSE30652_non_norm <- cbind(ILMN = rownames(GSE30652),
IDmean = rowMeans(GSE30652),
GSE30652)
dataNazor_samples <- pData(dataNazor)
dataNazor_samples <- as.data.frame(dataNazor_samples)
dataNazor_samples <- subset(dataNazor_samples,
select = c("geo_accession","characteristics_ch1.2"))
colnames(dataNazor_samples)[2] <- "CellType"
dataNazor_samples$CellType <- gsub("cell type: ","",dataNazor_samples$CellType)
dataNazor_samples$CellType <- gsub(", undifferentiated","",dataNazor_samples$CellType)
GPL6947_13512 <- fData(dataNazor)
GPL6947_13512_annot <- as.data.frame(GPL6947_13512)
GPL6947_13512_annot <- subset(GPL6947_13512_annot,
select = c("ID","Gene.symbol"))
GSE30652_merge <- merge(x = GPL6947_13512_annot,
y = GSE30652_non_norm,
by.x = "ID",
by.y = "ILMN")
GSE30652_merge <- GSE30652_merge[order(GSE30652_merge$IDmean,decreasing = TRUE),]
GSE30652_merge <- GSE30652_merge[!duplicated(GSE30652_merge$Gene.symbol),]
NazorMatrix <- GSE30652_merge
rownames(NazorMatrix) <- NazorMatrix$Gene.symbol
NazorMatrix <- NazorMatrix[,dataNazor_samples$geo_accession]
# Firstly we have previously prepared the Gene Expression matrix (genes in rows , samples in columns)
# for Nazor's dataset using Moonlight and getGEO's function.
TCGA_mRNA_StemScoreTable <- TCGAanalyze_Stemness(stemSig = PCBC_stemSig,
dataGE = NazorMatrix)
tab_mRNASi <- TCGA_mRNA_StemScoreTable
tab_mRNASi_merged <- merge(x = tab_mRNASi,
y = dataNazor_samples,
by.x = "Sample",
by.y = "geo_accession")
require(ggplot2)
require(ggpubr)
colnames(tab_mRNASi_merged)[3] <- "mRNASi"
tab_mRNASi_merged[tab_mRNASi_merged$CellType %in% "embryonic stem cell","CellType"] <- "ES"
tab_mRNASi_merged[tab_mRNASi_merged$CellType %in% "induced pluripotent stem cell","CellType"] <- "IPS"
tab_mRNASi_merged[tab_mRNASi_merged$CellType %in% "parthenogentic embryonic stem cell","CellType"] <- "Parthenogentic"
tab_mRNASi_merged[tab_mRNASi_merged$CellType %in% "Somatic.Primary","CellType"] <- "Primary"
tab_mRNASi_merged[tab_mRNASi_merged$CellType %in% "Somatic.Tissue","CellType"] <- "Tissue"
tab_mRNASi_merged <- tab_mRNASi_merged[order(tab_mRNASi_merged$mRNASi, decreasing = FALSE),]
library(forcats)
p <- ggplot(data=tab_mRNASi_merged, aes(x=Sample, y=mRNASi, fill = CellType)) +
geom_bar(stat="identity")+
scale_colour_gradient2()+
#coord_flip()+
# ylim(0, 15)+
#scale_x_discrete(limits = df1$Tissue)+
theme_classic() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
scale_fill_manual("legend", values = c("Primary" = "orange",
"Parthenogentic" = "blue",
"Tissue" = "darkgreen",
"ES" = "red",
"IPS" = "pink"))
p <- p + theme(legend.position="bottom")
p <- p + aes(x = fct_inorder(Sample))
ggsave(p, file = "Validation_mRNASi_Nazor.png", width = 6,height = 6)
The result from TCGAanalyze_Stemness validation in Nazor’s Dataset is shown below:
# Firstly we load Gene Expression matrix (genes in rows , samples in columns)
# from a previous generated pancancer gene expression matrix
load("~/Dropbox (Personal)/Umiami/TCGAanalysis/Stemness/dataFilt_panCancer33new.Rdata")
curCancer <- "BRCA"
# We have previously generated a table with 33 cancer types barcodes and molecular subtypes.
dataSubt_PanCancer <- PanCancerAtlas_subtypes()
# Selecting TCGA breast cancer
dataSubt_curCancer <- dataSubt_PanCancer[dataSubt_PanCancer$cancer.type %in% "BRCA",]
commonSamples <- intersect(dataSubt_curCancer$pan.samplesID, colnames(dataFilt))
dataFilt_curCancer <- dataFilt[,commonSamples]
load("~/Dropbox (Personal)/Umiami/Github/TCGAbiolinks/data/PCBC_stemSig.rda")
TCGA_mRNA_StemScoreTable <- TCGAanalyze_Stemness(stemSig = PCBC_stemSig,
dataGE = dataFilt_curCancer)
colnames(TCGA_mRNA_StemScoreTable)[1] <- "barcode"
sampleNT <- TCGAquery_SampleTypes(barcode = TCGA_mRNA_StemScoreTable$barcode,typesample = "NT")
sampleTP <- TCGAquery_SampleTypes(barcode = TCGA_mRNA_StemScoreTable$barcode,typesample = "TP")
#sampleTM <- TCGAquery_SampleTypes(barcode = TCGA_mRNA_StemScoreTable$barcode,typesample = "TM")
#sampleTAM <- TCGAquery_SampleTypes(barcode = TCGA_mRNA_StemScoreTable$barcode,typesample = "TAM")
#sampleTB <- TCGAquery_SampleTypes(barcode = TCGA_mRNA_StemScoreTable$barcode,typesample = "TB")
rownames(TCGA_mRNA_StemScoreTable) <- TCGA_mRNA_StemScoreTable$barcode
TCGA_mRNA_StemScoreTable[sampleNT,"SampleType"] <- "NT"
TCGA_mRNA_StemScoreTable[sampleTP,"SampleType"] <- "TP"
#TCGA_mRNA_StemScoreTable[sampleTM,"SampleType"] <- "TM"
#TCGA_mRNA_StemScoreTable[sampleTAM,"SampleType"] <- "TM"
#TCGA_mRNA_StemScoreTable[sampleTB,"SampleType"] <- "TB"
tab_mRNASi <- TCGA_mRNA_StemScoreTable
tab_mRNASi_merged <- merge(x = tab_mRNASi,
y = dataSubt_PanCancer,
by.x = "barcode",
by.y = "pan.samplesID")
require(ggplot2)
require(ggpubr)
colnames(tab_mRNASi_merged)[3] <- "mRNASi"
p<-ggplot(tab_mRNASi_merged, aes(x=SampleType, y=mRNASi, fill=SampleType))
p <- p + theme_classic()
p <- p + theme(legend.position="none")
p <- p + rotate_x_text(45)
p <- p + geom_jitter(shape=16, position=position_jitter(0.2), color = "black")
p <- p + geom_boxplot(position=position_dodge(1),outlier.colour = NA)
p <- p + theme(text = element_text(size=20))
ggsave(p , filename = "TCGA_BRCA_mRNASi_TP_NT.png",width = 5,height = 6)
tab_mRNASi_merged_TP <- tab_mRNASi_merged[tab_mRNASi_merged$SampleType %in% "TP",]
p<-ggplot(tab_mRNASi_merged_TP, aes(x=Subtype_Selected, y=mRNASi, fill=Subtype_Selected))
p <- p + theme_classic()
p <- p + theme(legend.position="none")
p <- p + rotate_x_text(45)
p <- p + geom_jitter(shape=16, position=position_jitter(0.2), color = "black")
p <- p + geom_boxplot(position=position_dodge(1),outlier.colour = NA)
p <- p + theme(text = element_text(size=20))
ggsave(p , filename = "TCGA_BRCA_mRNASi_subtypes.png",width = 5,height = 6)
The result from TCGAanalyze_Stemness is shown below:
The result from TCGAanalyze_Stemness with molecular subtypes is shown below:
# working with LGG and GBM
curCancer <- c("LGG","GBM")
# We have previously generated a table with 33 cancer types barcodes and molecular subtypes.
require(TCGAbiolinks)
dataSubt_PanCancer <- PanCancerAtlas_subtypes()
# Selecting TCGA breast cancer
dataSubt_curCancer <- dataSubt_PanCancer[dataSubt_PanCancer$cancer.type %in% curCancer,]
sampleCurCancer <- colnames(dataFilt)
sampleCurCancer <- sampleCurCancer[substr(sampleCurCancer,1,12) %in% dataSubt_curCancer$pan.samplesID]
dataFilt_curCancer <- dataFilt[,sampleCurCancer]
load("~/Dropbox (Personal)/Umiami/Github/TCGAbiolinks/data/PCBC_stemSig.rda")
TCGA_mRNA_StemScoreTable <- TCGAanalyze_Stemness(stemSig = PCBC_stemSig,
dataGE = dataFilt_curCancer)
colnames(TCGA_mRNA_StemScoreTable)[1] <- "barcode"
tab_mRNASi <- TCGA_mRNA_StemScoreTable
tab_mRNASi <- cbind(barcode12 = substr(tab_mRNASi$barcode,1,12),
tab_mRNASi)
tab_mRNASi$barcode12 <- as.character(tab_mRNASi$barcode12)
tab_mRNASi_merged <- merge(x = tab_mRNASi,
y = dataSubt_PanCancer,
by.x = "barcode12",
by.y = "pan.samplesID")
require(ggplot2)
require(ggpubr)
colnames(tab_mRNASi_merged)[4] <- "mRNASi"
p<-ggplot(tab_mRNASi_merged, aes(x=Subtype_Selected, y=mRNASi, fill=Subtype_Selected))
p <- p + theme_classic()
p <- p + theme(legend.position="none")
p <- p + rotate_x_text(45)
p <- p + geom_jitter(shape=16, position=position_jitter(0.2), color = "black")
p <- p + geom_boxplot(position=position_dodge(1),outlier.colour = NA)
p <- p + theme(text = element_text(size=20))
ggsave(p , filename = "TCGA_LGG_GBM_mRNASi_subtypes.png",width = 5,height = 6)
The result from TCGAanalyze_Stemness for LGG and GBM is shown below:
#working with survival glioma
tab_mRNASi_merged <- cbind(mRNASi_level = rep("mRNASi mod",nrow(tab_mRNASi_merged)),
tab_mRNASi_merged)
tabSi<- tab_mRNASi_merged
tabSi$mRNASi_level <- as.character(tabSi$mRNASi_level)
tabSi[tabSi$mRNASi < quantile(tabSi$mRNASi,1/3),"mRNASi_level"] <- "mRNASi Low"
tabSi[tabSi$mRNASi > quantile(tabSi$mRNASi,2/3),"mRNASi_level"] <- "mRNASi High"
require(TCGAbiolinks)
dataClin_LGG <- GDCquery_clinic(project = "TCGA-LGG",type = "clinical")
dataClin_GBM <- GDCquery_clinic(project = "TCGA-GBM",type = "clinical")
dataClin_LGG_GBM <- rbind(dataClin_LGG,dataClin_GBM)
dataClin_LGG_GBM <- dataClin_LGG_GBM[dataClin_LGG_GBM$submitter_id %in% tabSi$barcode12,]
dataClin_merged <- merge(x = dataClin_LGG_GBM,
y = tabSi,
by.x = "submitter_id",
by.y = "barcode12")
p <- TCGAanalyze_survival(dataClin_merged,
clusterCol = "mRNASi_level",
conf.int = FALSE,
main = "TCGA LGG GBM mRNASi",
height = 10,
width=10,
risk.table = TRUE,
filename = NULL)
p <- p$plot
p <- p + theme(legend.position = "bottom")
ggsave(p , filename = "TCGA_LGG_GBM_mRNASi_survival.png",width = 5,height = 6)
The result from the Stemness-survival association for LGG and GBM is shown below:
In this section we generate the Immune subtypes for TCGA and GEO tumors. Figure1C readapted from Thorsson et al., Immunity, 2018, to summarize features of six different immune subtypes.
download.file(url = "https://ars.els-cdn.com/content/image/1-s2.0-S1074761318301213-mmc2.xlsx",
destfile = "X1_s2_0_S1074761318301213_mmc2")
X1_s2_0_S1074761318301213_mmc2 <- read_excel("X1_s2_0_S1074761318301213_mmc2")
X1_s2_0_S1074761318301213_mmc2 <- as.data.frame(X1_s2_0_S1074761318301213_mmc2)
CancerType <- c("KIRC")
ImmuneSubtypes <- as.data.frame(X1_s2_0_S1074761318301213_mmc2)
ImmuneSubtypes <- ImmuneSubtypes[ImmuneSubtypes$`TCGA Study` %in% CancerType,]
ImmuneSubtypes <- ImmuneSubtypes[ImmuneSubtypes$`Immune Subtype`!="NA",]
dataClin_KIRC <- GDCquery_clinic(project = "TCGA-KIRC",type = "clinical")
dataClin_merged <- merge(x = dataClin_KIRC,
y = ImmuneSubtypes,
by.x = "submitter_id",
by.y = "TCGA Participant Barcode")
p <- TCGAanalyze_survival(dataClin_merged,
clusterCol = "Immune Subtype",
conf.int = FALSE,
main = "TCGA KIRC Immune Subtypes",
height = 10,
width=10,
risk.table = TRUE,
filename = NULL)
p <- p$plot
p <- p + theme(legend.position = "right")
ggsave(p , filename = "TCGA_KIRC_ImmuneSubtypes_survival.png",width = 10,height = 6)
The result from the Immune subtypes -survival association for BRCA samples is shown below:
require(MoonlightR)
#Jones J, Otu H, Spentzos D, Kolia S et al. Gene signatures of progression and metastasis in renal cell cancer.
# Clin Cancer Res 2005 Aug 15;11(16):5730-9. PMID: 16115910
dataGEO_KIRC<- getDataGEO(GEOobject = "GSE15641",
platform = "GPL96")
require(SummarizedExperiment)
GSE15641 <- as.data.frame(exprs(dataGEO_KIRC))
GSE15641_non_norm <- cbind(ILMN = rownames(GSE15641),
IDmean = rowMeans(GSE15641),
GSE15641)
GSE15641_annot <- fData(dataGEO_KIRC)
GSE15641_annot <- as.data.frame(GSE15641_annot)
GSE15641_annot <- subset(GSE15641_annot,
select = c("ID","Gene.symbol"))
dataGEO_samples <- pData(dataGEO_KIRC)
dataGEO_samples <- as.data.frame(dataGEO_samples)
dataGEO_samples <- subset(dataGEO_samples,
select = c("geo_accession","source_name_ch1"))
colnames(dataGEO_samples)[2] <- "CellType"
dataGEO_samples <- dataGEO_samples[dataGEO_samples$CellType %in% "cancerous human kidney tissue, clear cell RCC",]
GSE15641_merge <- merge(x = GSE15641_annot,
y = GSE15641_non_norm,
by.x = "ID",
by.y = "ILMN")
GSE15641_merge <- GSE15641_merge[order(GSE15641_merge$IDmean,decreasing = TRUE),]
GSE15641_merge <- GSE15641_merge[!duplicated(GSE15641_merge$Gene.symbol),]
GSE15641_Matrix <- GSE15641_merge
rownames(GSE15641_Matrix) <- GSE15641_Matrix$Gene.symbol
GSE15641_Matrix <- GSE15641_Matrix[,dataGEO_samples$geo_accession]
dataImmuneSubtype <- TCGAanalyze_ImmuneSubtypes(ImmuneMW = ImmuneMW,
dataGE = GSE15641_Matrix)
To include some examples from http://bioconductor.org/packages/release/bioc/vignettes/MoonlightR/inst/doc/Moonlight.html
And some figures from https://www.sciencedirect.com/science/article/pii/S0092867418303131
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.5
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] png_0.1-7 TCGAbiolinks_2.13.1
## [3] dplyr_0.8.1 SummarizedExperiment_1.14.0
## [5] DelayedArray_0.10.0 BiocParallel_1.18.0
## [7] matrixStats_0.54.0 Biobase_2.44.0
## [9] GenomicRanges_1.36.0 GenomeInfoDb_1.20.0
## [11] IRanges_2.18.0 S4Vectors_0.22.0
## [13] BiocGenerics_0.30.0
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.4-1 selectr_0.4-1
## [3] rjson_0.2.20 hwriter_1.3.2
## [5] circlize_0.4.6 XVector_0.24.0
## [7] GlobalOptions_0.1.0 clue_0.3-57
## [9] ggpubr_0.2 matlab_1.0.2
## [11] ggrepel_0.8.1 bit64_0.9-7
## [13] AnnotationDbi_1.46.0 xml2_1.2.0
## [15] codetools_0.2-16 splines_3.6.0
## [17] R.methodsS3_1.7.1 doParallel_1.0.14
## [19] DESeq_1.36.0 geneplotter_1.62.0
## [21] knitr_1.23 jsonlite_1.6
## [23] Rsamtools_2.0.0 km.ci_0.5-2
## [25] broom_0.5.2 annotate_1.62.0
## [27] cluster_2.0.8 R.oo_1.22.0
## [29] readr_1.3.1 compiler_3.6.0
## [31] httr_1.4.0 backports_1.1.4
## [33] assertthat_0.2.1 Matrix_1.2-17
## [35] lazyeval_0.2.2 limma_3.40.0
## [37] htmltools_0.3.6 prettyunits_1.0.2
## [39] tools_3.6.0 gtable_0.3.0
## [41] glue_1.3.1 GenomeInfoDbData_1.2.1
## [43] ggthemes_4.2.0 ShortRead_1.42.0
## [45] Rcpp_1.0.1 Biostrings_2.52.0
## [47] nlme_3.1-139 rtracklayer_1.44.0
## [49] iterators_1.0.10 xfun_0.7
## [51] stringr_1.4.0 rvest_0.3.4
## [53] XML_3.98-1.19 edgeR_3.26.1
## [55] zoo_1.8-5 zlibbioc_1.30.0
## [57] scales_1.0.0 aroma.light_3.14.0
## [59] hms_0.4.2 RColorBrewer_1.1-2
## [61] ComplexHeatmap_2.0.0 yaml_2.2.0
## [63] memoise_1.1.0 gridExtra_2.3
## [65] KMsurv_0.1-5 ggplot2_3.1.1
## [67] downloader_0.4 biomaRt_2.40.0
## [69] latticeExtra_0.6-28 stringi_1.4.3
## [71] RSQLite_2.1.1 highr_0.8
## [73] genefilter_1.66.0 foreach_1.4.4
## [75] GenomicFeatures_1.36.0 shape_1.4.4
## [77] rlang_0.3.4 pkgconfig_2.0.2
## [79] bitops_1.0-6 evaluate_0.13
## [81] lattice_0.20-38 purrr_0.3.2
## [83] cmprsk_2.2-7 GenomicAlignments_1.20.0
## [85] bit_1.1-14 tidyselect_0.2.5
## [87] plyr_1.8.4 magrittr_1.5
## [89] R6_2.4.0 generics_0.0.2
## [91] DBI_1.0.0 mgcv_1.8-28
## [93] pillar_1.4.0 survival_2.44-1.1
## [95] RCurl_1.95-4.12 tibble_2.1.1
## [97] EDASeq_2.18.0 crayon_1.3.4
## [99] survMisc_0.5.5 rmarkdown_1.12
## [101] GetoptLong_0.1.7 progress_1.2.2
## [103] locfit_1.5-9.1 sva_3.32.0
## [105] data.table_1.12.2 blob_1.1.1
## [107] ConsensusClusterPlus_1.48.0 digest_0.6.18
## [109] xtable_1.8-4 tidyr_0.8.3
## [111] R.utils_2.8.0 munsell_0.5.0
## [113] survminer_0.4.3